package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo9Submit {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession
      .builder()
      //提交到集群运行需要注释
      //      .master("local")
      .appName("submit")
      .getOrCreate()

    import spark.implicits._
    import org.apache.spark.sql.functions._

    //读取hdfs中的数据
    val studentDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", ",")
      .schema("id STRING,name STRING,age  INT,gender STRING,clazz STRING")
      .load("/data/student")


    val genderNumDF: DataFrame = studentDF
      .groupBy($"gender")
      .agg(count($"gender") as "c")


    //保存数据到hdfs
    genderNumDF
      .write
      .format("csv")
      .option("sep", ",")
      .mode(SaveMode.Overwrite)
      .save("/data/gender_num")

  }

}
